主题 工具
Domain Knownledge Machine Learning Badge Computer Science Development Badge
地址 1 2

1 table

1.1 gt表

library(tidyverse)
library(gt)
library(gtExtras)

学习gt表网站

1.1.1 表格1

mtcars %>% 
  head() %>% 
  gt() %>% 
  gt_theme_538()
mpg cyl disp hp drat wt qsec vs am gear carb
21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
18.1 6 225 105 2.76 3.460 20.22 1 0 3 1

1.1.2 表格2

mtcars %>% 
  head() %>% 
  gt() %>% 
  gt_theme_espn() %>% # gt_theme_nytimes() %>% 
  tab_header(title = "ESPN 风格表格")
ESPN 风格表格
mpg cyl disp hp drat wt qsec vs am gear carb
21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
18.1 6 225 105 2.76 3.460 20.22 1 0 3 1

1.1.3 颜色表格

高亮行 高亮 mpg, wt 列的 1-5 行。 gt_highlight_rows

mtcars %>% 
  head() %>% 
  gt() %>% 
  gt_highlight_cols(c(cyl, vs:carb),
                    fill = "lightblue", 
                    alpha = .5)%>%
  gt_highlight_rows(1:5, columns = c(mpg, wt)) %>% 
  gt_color_rows(hp,palette = "ggsci::blue_material")
mpg cyl disp hp drat wt qsec vs am gear carb
21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
  #palette = c("darkred", "orange")

1.1.4 增加图像

mtcars %>%
  select(cyl:wt, mpg) %>% 
  head() %>%
  gt() %>%
  gt_plt_bar(column = mpg, keep_column = TRUE, width = 35)
cyl disp hp drat wt mpg mpg
6 160 110 3.90 2.620 21.0
6 160 110 3.90 2.875 21.0
4 108 93 3.85 2.320 22.8
6 258 110 3.08 3.215 21.4
8 360 175 3.15 3.440 18.7
6 225 105 2.76 3.460 18.1
mtcars %>%
   head() %>%
   select(cyl, mpg) %>%
   mutate(mpg_pct_max = round(mpg/max(mpg) * 100, digits = 2),
                 mpg_scaled = mpg/max(mpg) * 100) %>%
   mutate(mpg_unscaled = mpg) %>%
   gt() %>%
   gt_plt_bar_pct(column = mpg_scaled, scaled = TRUE) %>%
   gt_plt_bar_pct(column = mpg_unscaled, scaled = FALSE,
                  fill = "blue", background = "lightblue") %>%
   cols_align("center", contains("scale")) %>%
   cols_width(4 ~ px(125),
              5 ~ px(125))
cyl mpg mpg_pct_max mpg_scaled mpg_unscaled
6 21.0 92.11
6 21.0 92.11
4 22.8 100.00
6 21.4 93.86
8 18.7 82.02
6 18.1 79.39

1.1.5 自定义图片

a=tibble(a=c(1,2),b=c(2,3),who=c('lp1',"lp2"))
a$b=c("./www/lp_lh1.png","./www/lp_lh2.png")
  a %>% 
  gt(groupname_col = "team_conf") %>%
  # gt_merge_stack(col1 = team_nick, col2 = team_division) %>%
  gt_img_rows(b)
a b who
1 lp1
2 lp2

1.1.6 美元

start_date <- "2010-06-07"
end_date <- "2010-06-14"
sp500 |>
  dplyr::filter(date >= start_date & date <= end_date) |>
  dplyr::select(-adj_close) |>
  gt() |>
  tab_header(
    title = "S&P 500",
    subtitle = glue::glue("{start_date} to {end_date}")
  ) |>
  fmt_currency() |>
  fmt_date(columns = date, date_style = "wd_m_day_year") |>
  fmt_number(columns = volume, suffixing = TRUE)
S&P 500
2010-06-07 to 2010-06-14
date open high low close volume
Mon, Jun 14, 2010 $1,095.00 $1,105.91 $1,089.03 $1,089.63 4.43B
Fri, Jun 11, 2010 $1,082.65 $1,092.25 $1,077.12 $1,091.60 4.06B
Thu, Jun 10, 2010 $1,058.77 $1,087.85 $1,058.77 $1,086.84 5.14B
Wed, Jun 9, 2010 $1,062.75 $1,077.74 $1,052.25 $1,055.69 5.98B
Tue, Jun 8, 2010 $1,050.81 $1,063.15 $1,042.17 $1,062.00 6.19B
Mon, Jun 7, 2010 $1,065.84 $1,071.36 $1,049.86 $1,050.47 5.47B

1.2 2

2 Plots

2.1 箱线图/小提琴图

p1<-mtcars %>% mutate(am=factor(am),
                  vs=factor(vs)) %>% 
  ggplot(aes(x=vs,y=mpg,fill=am))+
  geom_violin(col="white",trim = FALSE)+
  geom_boxplot(width=.3,position=position_dodge(width=0.9))+
  theme_bw()+theme(legend.position = c(0.15,0.85))+
  guides(alpha='none')+
  labs(x='Name of person',y='Heart rate',title = "Performance on lie detector test")+
  scale_fill_brewer(palette="Set2")
p1

2.2 p2

mtcars %>% count(vs,cyl) %>% mutate(name=c("a","b","c","d","e")) %>% 
  mutate_at(c('vs','cyl'),as.factor) %>% 
  ggplot(aes(x=vs,y=n,fill=cyl,label=name))+
  geom_bar(stat = "identity",
           position = 'fill',col=1)+
  geom_text(aes(label=name),size=4,vjust=0.5,position = 'fill')+
  scale_fill_brewer(palette="Set2")+
  # scale_fill_manual(values = heat.colors(7))+
  # scale_fill_manual(values = terrain.colors(7))+
  labs(title="Facebook theme",caption = "made by chz")+
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.5,#居中
                                  vjust =0,#上下
                                  color = 'red',
                                  face = "italic")
  )

2.3 点线图

dd<-tibble(name=rep(letters[1:10],2),health=rnorm(20,10,1),time=rep(c('a','n'),each=10))
dd%>%ggplot(aes(x= health, y= name)) + 
  geom_line(aes(group = name))+geom_point(aes(fill=time),shape = 21, size = 1)+
  labs(title="Changes in health on time",x="health", y="name")+
  theme(axis.text.y = element_text(size = 5))

2.4 树图

library(ggplot2) 
library(treemapify)

ggplot(G20, aes(area = gdp_mil_usd, fill = hdi,label = country)) + 
  geom_treemap()+
  geom_treemap_text(fontface = "italic", colour = "red",
                    place = "centre",grow = TRUE,alpha=.6)+
  scale_fill_distiller(palette="Reds")

# 其中place参数控制每一个方块中标签相对于四周的位置,
# grow则控制标签是否与方块大小自适应(呈大致比例放大缩小)

# 次级分组(亚群):
ggplot(G20, aes(area = gdp_mil_usd, fill = hdi, label = country,subgroup = region)) +
  geom_treemap() +
  geom_treemap_subgroup_border() +
  geom_treemap_subgroup_text(place = "centre", grow = T,
                             alpha = 0.8, colour ="black", fontface = "italic", min.size = 0) +
  geom_treemap_text(colour = "red", place = "topleft", reflow = T,alpha=.5)+
  scale_fill_distiller(palette="Reds")

3 仪表盘

3.1 1

source("gba-464-A5-laser-tag-data.R")#放在同一文件夹
#预处理数据
n=8#比赛场数
scores_red=matrix(0,n,length(red_player_scores_game_1))#名字作为列名,行代表场次存放比赛成绩
scores_green=matrix(0,n,length(green_player_scores_game_1))
for(i in 1:n){
  sco_red=paste0("red_player_scores_game_",i)
  sco_green=paste0("green_player_scores_game_",i)
  scores_red[i,]=get(sco_red)
  scores_green[i,]=get(sco_green)
}
colnames(scores_green)=red_team_players
colnames(scores_red)=green_team_players
scores_green;scores_red#作为下列函数输入参数
##      Einstein  Bond Spock  Noob Snake Pineapple  Kirk  Worf  Bean Mabel
## [1,]    24548 36897 37525 56700 32273     33940 20613 28572 33729 35685
## [2,]    39750 44572  6245 36579 37148     31459 46677 35618 35141 35239
## [3,]    36341 37177 38622 42036 23647     45705 36379 26774 23467 33767
## [4,]    29791 22841 36018 39865 20988     31962 13360 31564 28691 33899
## [5,]    17418 35085 29004 11515 25331     26541 32692 33534 42084 23517
## [6,]    38350  9773 40169 36308 24686     36011 29035 21147 43224 36988
## [7,]    42046 31046 44619 25441 35483     36049 19678 25663 38857 36171
## [8,]    37927 27397 33541 52990 15259     29571 31181 22372 26975 34130
##      Cupcake Boss Level Pudding Watermelon Ham Sandwich Hot Sauce Jelly Bean
## [1,]   32645      43254   30085      27778        34794     22655      32973
## [2,]   44130      40428   20832      34975        36619     36397      31588
## [3,]   32163      53576   26532      28946        11174     11714      27134
## [4,]   21929      49045   38619      25720        22744     53698      30021
## [5,]   55633      24876   32801      20512        47961     23739      37963
## [6,]   30177      19649   31110      15494        33072     42076      41820
## [7,]   31759      31375   31975      24144        24347     30379      36231
## [8,]   35060      15172   42249      17547        12911     17610      19106
##      Rambo Weasley Sherlock
## [1,] 22573   52667    22427
## [2,] 42207   14973    21269
## [3,] 30225   23268    26403
## [4,] 37116   24387    47893
## [5,] 25277   22272    33279
## [6,] 31452   33373    42131
## [7,] 24711   41653    38181
## [8,] 29184   29837    28373
#1A
partA=function(green_team_players,red_team_players){
  cat("Part A: Team Roster Dashboard Prototype");  cat('\n');
  cat("---------------------------------------");  cat('\n');
  cat(paste('The green has',length(green_team_players),"players:"));cat('\n');
  cat("palyer");cat("\t");cat(format('name',width=8,justify="c"));cat("\n")
  for(i in 1:length(green_team_players)){
    cat(paste(i,"\t",green_team_players[i]),fill = T)
  }
  cat('\n')
  cat(paste('The red has',length(red_team_players),"players:"));cat('\n');
  cat("palyer");cat("\t");cat(format('name',width=8,justify="c"));cat("\n")
  for(i in 1:length(red_team_players)){
    cat(paste(i,"\t",red_team_players[i]),fill = T)
  }
}

partA(green_team_players,red_team_players)
## Part A: Team Roster Dashboard Prototype
## ---------------------------------------
## The green has 10 players:
## palyer     name  
## 1     Cupcake
## 2     Boss Level
## 3     Pudding
## 4     Watermelon
## 5     Ham Sandwich
## 6     Hot Sauce
## 7     Jelly Bean
## 8     Rambo
## 9     Weasley
## 10    Sherlock
## 
## The red has 10 players:
## palyer     name  
## 1     Einstein
## 2     Bond
## 3     Spock
## 4     Noob
## 5     Snake
## 6     Pineapple
## 7     Kirk
## 8     Worf
## 9     Bean
## 10    Mabel
format("Green",width=8,justify="c")
## [1] " Green  "

3.2 2

#2B
partB=function(scores_red,scores_green){
  cat("Part B: Game Summary Dashboard Protype A");  cat('\n');
  cat("---------------------------------------");  cat('\n');
  cat('\n');
  cat("Game");cat("\t");cat(format("Green",width=8,justify="c"));cat("\t");cat(format("Red",width=8,justify="c"));cat("\n")
  for(i in 1:nrow(scores_red)){
    cat(paste(i,"\t",as.character(format(round(mean(scores_green[i,]),0),big.mark=','),0),"\t",as.character(format(round(mean(scores_red[i,]),0),big.mark=','))),fill = T)
  }
}

partB(scores_red,scores_green)
## Part B: Game Summary Dashboard Protype A
## ---------------------------------------
## 
## Game  Green        Red   
## 1     34,048      32,185
## 2     34,843      32,342
## 3     34,392      27,114
## 4     28,898      35,117
## 5     27,672      32,431
## 6     31,569      32,035
## 7     33,505      31,476
## 8     31,134      24,705
#3C
partC=function(scores_red,scores_green){
  cat("Part C: Game Summary Dashboard Prototype B");  cat('\n');
  cat("---------------------------------------");  cat('\n');
  cat('\n');
  cat("Game");cat("\t");cat(format("Green",width=12,justify="c"));
  cat(format("Red",width=12,justify="c"));
  cat(format("Total",width=6,justify="c"));cat("\t");cat("Winner");cat("\n")
  for(i in 1:nrow(scores_red)){
    cat(paste(i,"\t",
        format(round(mean(scores_green[i,]),0),width=6,justify="c",big.mark=','),
        format(round(mean(scores_red[i,]),0),width=12,justify="c",big.mark=','),
        format(round(mean(scores_red[i,])+mean(scores_green[i,]),0),width=6,justify="c",big.mark=','),
        (if(mean(scores_red[i,])>mean(scores_green[i,]))'red' else 'green')),
        fill = T)
  }
}

partC(scores_red,scores_green)
## Part C: Game Summary Dashboard Prototype B
## ---------------------------------------
## 
## Game    Green        Red     Total   Winner
## 1      34,048        32,185  66,233 green
## 2      34,843        32,342  67,185 green
## 3      34,392        27,114  61,505 green
## 4      28,898        35,117  64,015 red
## 5      27,672        32,431  60,103 red
## 6      31,569        32,035  63,604 red
## 7      33,505        31,476  64,981 green
## 8      31,134        24,705  55,839 green
#4D
partD=function(scores_green,scores_red){ 
  t=0;
  MVP=names(which(colMeans(scores_green)==max(colMeans(scores_green))))
  stars=paste0(names(which(colMeans(scores_green)>=30000)))
  for(i in 1:nrow(scores_green)){
    if(mean(scores_red[i,])>mean(scores_green[i,])) t=t+1
    }
  cat("Part D: Green Team Post-Game Dashboard Prototype");cat('\n');
  cat("---------------------------------------");cat('\n');
  cat('\n');
  cat(paste0(format("Number of Playear:",width=30,justify="l"),ncol(scores_green)));cat('\n');
  cat(paste0(format("Games Won:",width=30,justify="l"), nrow(scores_green),"(",t/nrow(scores_green)*100,"%)"));cat('\n');
  cat(paste0(format("Average team:",width=30,justify="l"),mean(scores_green),"points"));cat('\n');
  cat(paste0(format("Average Individual Score:",width=30,justify="l"),mean(scores_green[,MVP]),"points"));cat('\n');
  cat(format("Team All-Stars:",width=28,justify="l"));cat(stars[stars!=MVP]);cat('\n');
  cat(paste0(format("Team MVP:",width=30,justify="l"),MVP,"(",sum(scores_green[,MVP]),"total points)"));cat('\n');
}

partD(scores_green,scores_red)
## Part D: Green Team Post-Game Dashboard Prototype
## ---------------------------------------
## 
## Number of Playear:            10
## Games Won:                    8(37.5%)
## Average team:                 32007.65points
## Average Individual Score:     37679.25points
## Team All-Stars:             Einstein Bond Spock Pineapple Bean Mabel
## Team MVP:                     Noob(301434total points)
#5E
partE=function(scores_green){
  d5=as.data.frame(scores_green)
  a1=format(sapply(d5,max),big.mark=',')
  a2=sapply(d5,which.max)
  Best_Score=paste0(a1,"(",a2,")")
  a3=format(sapply(d5,min),big.mark=',')
  a4=sapply(d5,which.min)
  Worst_Score=paste0(a3,"(",a4,")")
  Average=format(round(sapply(d5,mean),0),big.mark=',')
  Tolal=format(sapply(d5,sum),big.mark=',')
  a7=round(colSums(d5)/sum(d5)*100,2)
  Percent=paste0(a7,"%")
  a8=data.frame(Best_Score,Worst_Score,Average,Tolal,Percent)
  colSums(d5)/sum(d5)*100
  cat("Part E:Player Summary Dashboard Prototype");cat('\n');
  cat("---------------------------------------");cat('\n');
  cat(format("Player",width=12,justify="c"));cat(format(names(a8),width=10,justify="c"));cat('\n');
  for(i in 1:nrow(a8)){
    cat(format(row.names(a8)[i],width=12,justify="c"))
    for(j in 1:ncol(a8)){
      cat(format(as.character(a8[i,j]),width=12,justify="c"))
    }
    cat('\n')
  } 
  }

partE(scores_green)
## Part E:Player Summary Dashboard Prototype
## ---------------------------------------
##    Player   Best_Score  Worst_Score   Average      Tolal      Percent  
##   Einstein   42,046(7)   17,418(5)     33,271     266,171      10.39%   
##     Bond     44,572(2)    9,773(6)     30,598     244,788      9.56%    
##    Spock     44,619(7)    6,245(2)     33,218     265,743      10.38%   
##     Noob     56,700(1)   11,515(5)     37,679     301,434      11.77%   
##    Snake     37,148(2)   15,259(8)     26,852     214,815      8.39%    
##  Pineapple   45,705(3)   26,541(5)     33,905     271,238      10.59%   
##     Kirk     46,677(2)   13,360(4)     28,702     229,615      8.97%    
##     Worf     35,618(2)   21,147(6)     28,156     225,244       8.8%    
##     Bean     43,224(6)   23,467(3)     34,021     272,168      10.63%   
##    Mabel     36,988(6)   23,517(5)     33,674     269,396      10.52%
#6F
partF=function(scores_green,scores_red){
  d5=as.data.frame(scores_green)
  d5red=as.data.frame(scores_red)
  d6=cbind.data.frame(scores_green,scores_red)
  b1=sapply(d6, sum)
  rankname=names(sort(b1,decreasing = T))
  rankgreen=names(sort(colSums(d5),decreasing = T))
  rankred=names(sort(colSums(d5red),decreasing = T))
  hhgeern=data.frame(name=rankgreen,rank1=c(1:length(rankgreen)))
  hhred=data.frame(name=rankred,rank1=c(1:length(rankred)))
  hh1=rbind(hhgeern,hhred)
  hh2=data.frame(name=rankname,rank2=c(1:length(rankname)))
  tab6=merge(hh1,hh2)
  for(i in 1:nrow(tab6)){
    for (j in 1:ncol(tab6)) {
      if(nchar(tab6[i,j])<2)
        tab6[i,j]=paste0('0',tab6[i,j])
    }
  }
  tab66=data.frame(Toltal_Score=format(b1,big.mark=','),Team_rank=tab6$rank1,Overrall_rank=tab6$rank2)
  cat("Part F: Player Rank Dashboard Prototype");cat('\n');
  cat("---------------------------------------");cat('\n');
  cat(format("Player",width=12,justify="c"));cat(format(names(tab66),width=12,justify="c"));cat('\n');
  for(i in 1:nrow(tab66)){
    cat(format(row.names(tab66)[i],width=12,justify="c"))
    for(j in 1:ncol(tab66)){
      cat(format(as.character(tab66[i,j]),width=12,justify="c"))
    }
    cat('\n')
  }  
}
partF(scores_green,scores_red)
## Part F: Player Rank Dashboard Prototype
## ---------------------------------------
##    Player   Toltal_Score    Team_rank   Overrall_rank
##   Einstein    266,171        02          04     
##     Bond      244,788        07          12     
##    Spock      265,743        02          03     
##     Noob      301,434        01          02     
##    Snake      214,815        05          07     
##  Pineapple    271,238        09          18     
##     Kirk      229,615        08          15     
##     Worf      225,244        04          10     
##     Bean      272,168        08          16     
##    Mabel      269,396        04          06     
##   Cupcake     283,496        01          01     
##  Boss Level   277,375        03          05     
##   Pudding     254,203        05          11     
##  Watermelon   195,116        06          13     
## Ham Sandwich  223,622        03          09     
##  Hot Sauce    238,268        10          19     
##  Jelly Bean   256,836        06          08     
##    Rambo      242,745        10          20     
##   Weasley     242,430        07          14     
##   Sherlock    259,956        09          17

3.3 3

#####part1####
cereals=read_csv("gba-464-A6-breakfast-cereals-data.csv")

#####part2####
fun1=function(data){
cat("PART 2: EXPLORE");cat("\n");cat(paste0(rep("-",16),collapse = ''))
  cat('\n')
print(data[1:4,]);cat("\n")
cat(paste0('There are',' ',nrow(cereals),' ','rows in the data'))
cat('\n');cat('\n')
col_name=names(data)
for (i in 1:length(col_name)) {
  cat(i,'.',col_name[i],fill = T)
}
}
fun1(cereals)
## PART 2: EXPLORE
## ----------------
## # A tibble: 4 × 16
##     SHE TYP     CAL   RAT   PRO   SOD   POT NAM      VIT   WEI COM     CUP   FIB
##   <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>  <dbl> <dbl> <chr> <dbl> <dbl>
## 1     2 C       100  45.3     4   150    95 Life      25     1 Q      0.67     2
## 2     3 C       110  40.4     3   140   105 Clust…    25     1 G     NA        2
## 3     1 C       100  40.1     2   220    90 Multi…    25     1 G      1        2
## 4     3 C       150  37.1     4    95   170 Muesl…    25     1 R      1        3
## # ℹ 3 more variables: CAR <dbl>, FAT <dbl>, SUG <dbl>
## 
## There are 77 rows in the data
## 
## 1 . SHE
## 2 . TYP
## 3 . CAL
## 4 . RAT
## 5 . PRO
## 6 . SOD
## 7 . POT
## 8 . NAM
## 9 . VIT
## 10 . WEI
## 11 . COM
## 12 . CUP
## 13 . FIB
## 14 . CAR
## 15 . FAT
## 16 . SUG
#####part3####
names(cereals)=c('shelf','type','calories','rating','protein','sodium','potassium','name','vitamins','weight','company','cups','fiber','carbs','fat','sugar')
cereals<-cereals%>% 
mutate(shelfName=case_when(shelf==1~"BottomShelf",
                           shelf==2~"EyeLevel",
                           shelf==3~"TopShelf"
                           ))
cereals<-cereals %>% 
  mutate(caloriesPerCup=round(calories/cups),0) %>% 
  arrange(desc(caloriesPerCup))

cereals=cereals %>% mutate(company=case_when(company=='A'~"American Home Food Products",
                                         company=='G'~"General Mills",
                           company=='K'~"Kellogg's",
                           company=='N'~"Nabisco",
                           company=='P'~"Post",
                           company=='Q'~"Quaker Oats",
                           company=="R"~"Ralston Purina"))

cereals=cereals %>% mutate(type=case_when(type=='H'~"Hot",
                                         type=='C'~"Cold"
                                   ))

cereals$rating=round(cereals$rating,0)
library('gt')
a=cereals[1:10,] %>% 
select(company,name,type,shelfName,rating,
  caloriesPerCup,vitamins,sugar) %>% 
gt()
a <-a %>%tab_header(title = "Highest-Calorie Cereals")
a
Highest-Calorie Cereals
company name type shelfName rating caloriesPerCup vitamins sugar
Post Grape-Nuts Cold TopShelf 53 440 25 3
Post Great Grains Pecan Cold TopShelf 46 364 25 4
General Mills Oatmeal Raisin Crisp Cold TopShelf 30 260 25 10
Kellogg's Mueslix Crispy Blend Cold TopShelf 30 239 25 13
Kellogg's Cracklin' Oat Bran Cold TopShelf 40 220 25 7
Kellogg's All-Bran Cold TopShelf 59 212 25 5
Nabisco 100% Bran Cold TopShelf 68 212 25 6
Kellogg's Nutri-Grain Almond-Raisin Cold TopShelf 41 209 25 7
Quaker Oats Quaker Oat Squares Cold TopShelf 50 200 25 6
General Mills Raisin Nut Bran Cold TopShelf 40 200 25 8
#####part4####
b=cereals %>%group_by(type) %>% 
  summarise( averageCaloriesPerCup=mean(calories/cups,na.rm = TRUE))
c=cereals %>% 
  summarise( averageRating=mean(rating,na.rm = TRUE))

part4=function(b,c){
    cat("PART 4: ANALYZE");cat("\n");cat(paste0(rep("-",16),collapse = ''))
    cat('\n');cat('\n');cat('The average calories per of cup of cold cereal:')
    cat("\n");print(b[1,2])
    cat('\n');cat('The average calories per of cup of hot cereal:')
    cat("\n");print(b[2,2])
    cat('\n');cat('The average calories per of cup of hot cereal:')
    cat("\n");print(c)
}
part4(b,c)
## PART 4: ANALYZE
## ----------------
## 
## The average calories per of cup of cold cereal:
## # A tibble: 1 × 1
##   averageCaloriesPerCup
##                   <dbl>
## 1                  144.
## 
## The average calories per of cup of hot cereal:
## # A tibble: 1 × 1
##   averageCaloriesPerCup
##                   <dbl>
## 1                  116.
## 
## The average calories per of cup of hot cereal:
## # A tibble: 1 × 1
##   averageRating
##           <dbl>
## 1          42.8
cc1=cereals%>%filter(type=='Cold') %>% select(company,name,sugar)%>%
  arrange(desc(sugar)) 
cc1[1:10,]%>% gt() %>% tab_header(title = "Most Sugary cereals")
Most Sugary cereals
company name sugar
Kellogg's Smacks 15
Post Golden Crisp 15
Post Post Natural Raisin Bran 14
General Mills Total Raisin Bran 14
Kellogg's Apple Jacks 14
Kellogg's Mueslix Crispy Blend 13
General Mills Count Chocula 13
Kellogg's Froot Loops 13
General Mills Cocoa Puffs 13
Kellogg's Fruitful Bran 12
cc2=cereals%>%filter(type=='Cold') %>% select(company,name,rating)%>%
  arrange(rating) 
cc2[1:10,]%>% gt() %>% tab_header(title = "Lowest-rated Cereals")
Lowest-rated Cereals
company name rating
Quaker Oats Cap 'n' Crunch 18
General Mills Cinnamon Toast Crunch 20
General Mills Count Chocula 22
General Mills Cocoa Puffs 23
General Mills Golden Grahams 24
General Mills Lucky Charms 27
Post Fruity Pebbles 28
General Mills Trix 28
General Mills Total Raisin Bran 29
Post Honey-comb 29
cc3=cereals%>%filter(type=='Cold'&vitamins==0) %>% select(company,name,vitamins)%>%
  arrange(name)
cc3%>% gt() %>% tab_header(title = "Cereals with No Nutritional Value")
Cereals with No Nutritional Value
company name vitamins
Quaker Oats 100% Natural Bran 0
Quaker Oats Puffed Rice 0
Quaker Oats Puffed Wheat 0
Nabisco Shredded Wheat 0
Nabisco Shredded Wheat 'n' Bran 0
Nabisco Shredded Wheat Spoon Size 0
#####part5####
library(ggplot2)
ggplot(cereals, aes(x=sugar,y=rating))+
  geom_smooth(method = "lm", se=T,formula =y~x )+
  geom_point()+xlab("Grams of Sugar Per Serving")+
  ylab("Consumer Reposts Rating")

ggplot(cereals, aes(rating)) + 
  geom_histogram(binwidth = 5.1,color=1,fill=5)+ggtitle("Number of Cereals by Consumer Reports Rating") +
  theme(plot.title = element_text(hjust = 0.1,size =10))+xlab("Consumer Reports Rating")+
  ylab("Number of Cereals")

dat1=cereals %>% filter(rating>42.8) %>% select(rating,shelfName)
ggplot(dat1, aes(shelfName)) + 
  geom_bar(color=1,fill=5)+ggtitle("Number of Cereals with Above Average Rating by supermarket Shelf Placement") +
  theme(plot.title = element_text(hjust = 0.1,size =10))+xlab("Supermarket Shelf Placement
")+
  ylab("Number of Cereals")

ggplot(cereals, aes(y=caloriesPerCup)) + 
  geom_boxplot()+ggtitle("Box Plot for Cereal Calories Per Cup") +
  theme(plot.title = element_text(hjust = 0.1,size =10))

4 画图等

4.1 R Markdown

  1. 加粗
  2. 斜体
  3. 删除
  4. 斜体加粗的文字

我是蓝色

4.2 Slide with Bullets

  • Bullet 1
  • Bullet 2
  • Bullet 3
  • Bullet 4

4.3 表格1

姓名 性别 分数
小明 100
小红 89
小飞 88

4.4 表格2

cars %>% flextable::as_flextable()

speed

dist

numeric

numeric

4

2

4

10

7

4

7

22

8

16

9

10

10

18

10

26

10

34

11

17

n: 50

4.5 可查询表格

DT::datatable(mtcars)

4.6 时间动态图

library(patchwork)
mydat=tibble(a=rep(2000:2019,each=2),b=runif(40,1,100),c=sample(c('a','b'),40,replace = T))
p1<-ggplot(mydat,aes(x=a,y=b,col=c,group=c))+geom_line()+geom_point()
p2<-ggplot(mydat,aes(x=c,y=b,col=c,group=c))+geom_boxplot()
pp<-(p1/ plot_spacer())|p2
p1+transition_reveal(a)

ggplotly(p1)
ggplotly(p2)

4.7 3D玫瑰

x<- seq(0, 24) /24
t <- seq(0, 575, by = 0.5) / 575*20 *pi + 4 *pi
grid <- expand.grid(x = x, t = t)
x <- matrix(grid$x, ncol = 25, byrow = TRUE)
t <- matrix(grid$t, ncol = 25, byrow = TRUE)
p<- (pi/2)*exp(-t/(8*pi))
change <- sin(15 * t) /150
u<-1-(1-(3.6*t)%%(2*pi) /pi)^4/2+change
y <- 2*(x^2- x)^2* sin(p)
r<- u*(x*sin(p) +y *cos(p))
xx=r*cos(t)
yy=r*sin(t)
zz=u*(x*cos(p)-y*sin(p))
plot_ly(x = ~xx, y = ~yy, z = ~zz,color = ~zz, colors = 'Reds',opacity = 0.5)%>% add_surface()->plot
add_trace(plot,x=rep(0,4),y=rep(0,4),z=seq(-0.5,0,length=4), mode='lines', line = list(color = 'green', width = 8)) %>% 
  add_text(x=0,y=0,z=1,text="plot by chz",list(color = 'green', size = 8))

4.8 流程图

library(DiagrammeR)
grViz("
  digraph {
# initiate graph
graph [layout = dot, rankdir = LR, label = '研究路线\n\n',labelloc = t]

# global node settings
node [shape = rectangle, style = filled, fillcolor = Linen]

    A[label = '数据', shape = folder, fillcolor = Beige]
    B[label =  '预处理-\n选取,整合变量']
    C[label =  '欠采样\n 类别不平衡样本']
    D[label =  '朴素贝叶斯']
    E[label =  '逻辑回归']
    F[label =  '神经网络']
    G[label= 'gbm梯度提升']
    H[label= 'gbm提升模型\n参数优化']
    P[label= '1.准确率 \n 2.重要性 \n 3.ROC曲线']
    MOD[label= '最终模型',fillcolor = Beige]

    
blank1[label = '', width = 0.01, height = 0.01]   
# A -> blank1[dir=none];
# blank1 -> B[minlen=10];
#   {{ rank = same; blank1 B }}
# blank1 -> C
# blank2[label = '', width = 0.01, height = 0.01]   
# C -> blank2[dir=none];
# blank2 -> D[minlen=1];
#   {{ rank = same; blank2 E }}
# blank2 -> E [minlen=10]
    A->B
      {{ rank = same; A B }}
    B->C
    C->{D,E,F,G}
      {D,E,F,G}->P
    subgraph cluster_modules {
    label = '模型构建'
    color = red
    style = dashed
    # connect moderator to module 4
    {D,E,F,G}
        }
  P->H
    subgraph cluster_moderator {
    label = '模型评估'
    color = red
    style = dashed
    P}
    H->MOD
      {{ rank = same;H MOD }}
  }
")

4.9 自定义视频

4.10 动态图(点图)

as.tibble(EuStockMarkets) %>% mutate(year=c(1:1860)) %>% 
ggplot(aes(x=SMI,y=CAC)) +
geom_point(size=1,alpha=0.6,col='red') +
transition_states(year,
                  transition_length = 2,
                  state_length = 1)+
ease_aes('linear')+
ggtitle('Timepoints is {closest_state} \n VL(M) is ture?')+
  shadow_mark(size = 0.5, colour = 'lightblue')

4.11 地图leaflet

地图学习

df <- sp::SpatialPointsDataFrame(
  cbind(
    (runif(4,-0.5,0.5))/2 + 112.99,  # lng
    (runif(4,-0.5,0.5))/2 + 28.11  # lat
  ),
  data.frame(type = factor(
    rep(c("pirate", "ship"),2),
    c("ship", "pirate")
  ))
)

oceanIcons <- iconList(
  ship = makeIcon(iconUrl = "www/lp_lh1.png",
  iconWidth =50, iconHeight = 50),
  pirate = makeIcon(iconUrl = "www/lp_lh2.png",
  iconWidth =50, iconHeight = 50)
)
m<-leaflet() %>% 
  addTiles(group = "OSM (default)") %>% 
  setView(112.99, 28.11, zoom = 10) %>% 
  addMarkers(112.99, 28.11, popup="The birthplace of R",
             group = "1") %>%
  addCircleMarkers(112.99, 28.11,radius = 10, color = c('red'),
                   group = "2") %>% 
  addCircles(112.99, 28.11,weight = 3,radius = 10000, color =    c('red'),group = "3") %>% 
  addRectangles(
    lng1=113.2, lat1=28.3,lng2=112.8, lat2=27.9,fillColor = "yellow",group = "4") %>% 
  addMarkers(data=df,icon = ~oceanIcons[type],clusterOptions = markerClusterOptions(),group = "5") %>% 
    addLayersControl(
    baseGroups = c("OSM (default)"),
    overlayGroups = c("1", "2","3", "4","5"),
    options = layersControlOptions(collapsed = T,autoZIndex = TRUE)
  )
m

5 线性规划

5.1 问题

5.2 答案

# 1
FIB=c(1,1)
for(i in 3:100){
  FIB[i]=FIB[i-1]+FIB[i-2]
}
FIB
##   [1] 1.000000e+00 1.000000e+00 2.000000e+00 3.000000e+00 5.000000e+00
##   [6] 8.000000e+00 1.300000e+01 2.100000e+01 3.400000e+01 5.500000e+01
##  [11] 8.900000e+01 1.440000e+02 2.330000e+02 3.770000e+02 6.100000e+02
##  [16] 9.870000e+02 1.597000e+03 2.584000e+03 4.181000e+03 6.765000e+03
##  [21] 1.094600e+04 1.771100e+04 2.865700e+04 4.636800e+04 7.502500e+04
##  [26] 1.213930e+05 1.964180e+05 3.178110e+05 5.142290e+05 8.320400e+05
##  [31] 1.346269e+06 2.178309e+06 3.524578e+06 5.702887e+06 9.227465e+06
##  [36] 1.493035e+07 2.415782e+07 3.908817e+07 6.324599e+07 1.023342e+08
##  [41] 1.655801e+08 2.679143e+08 4.334944e+08 7.014087e+08 1.134903e+09
##  [46] 1.836312e+09 2.971215e+09 4.807527e+09 7.778742e+09 1.258627e+10
##  [51] 2.036501e+10 3.295128e+10 5.331629e+10 8.626757e+10 1.395839e+11
##  [56] 2.258514e+11 3.654353e+11 5.912867e+11 9.567220e+11 1.548009e+12
##  [61] 2.504731e+12 4.052740e+12 6.557470e+12 1.061021e+13 1.716768e+13
##  [66] 2.777789e+13 4.494557e+13 7.272346e+13 1.176690e+14 1.903925e+14
##  [71] 3.080615e+14 4.984540e+14 8.065155e+14 1.304970e+15 2.111485e+15
##  [76] 3.416455e+15 5.527940e+15 8.944394e+15 1.447233e+16 2.341673e+16
##  [81] 3.788906e+16 6.130579e+16 9.919485e+16 1.605006e+17 2.596955e+17
##  [86] 4.201961e+17 6.798916e+17 1.100088e+18 1.779979e+18 2.880067e+18
##  [91] 4.660047e+18 7.540114e+18 1.220016e+19 1.974027e+19 3.194043e+19
##  [96] 5.168071e+19 8.362114e+19 1.353019e+20 2.189230e+20 3.542248e+20
# There are too many digits to display

# 2
FIBMAT=matrix(FIB,byrow =T,ncol=10)

# 3
for(i in 1:nrow(FIBMAT)){
  FIBMAT[i,i]=FIBMAT[i,i]^2
}
# 4
hist(diag(FIBMAT),col="red")

# The distribution is mainly concentrated in the small value, 
# there is a small number of maximum value, jump growth

# 5
t.test(FIBMAT[,1],FIBMAT[,2])
## 
##  Welch Two Sample t-test
## 
## data:  FIBMAT[, 1] and FIBMAT[, 2]
## t = -0.32787, df = 15, p-value = 0.7475
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.178012e+18  1.597276e+18
## sample estimates:
##    mean of x    mean of y 
## 4.698246e+17 7.601922e+17
# p=0.7475,do not reject H0, no diff

# 6
Price=readxl::read_xlsx('data.xlsx',skip = 11)
Price=as.data.frame(Price)
colnames(Price)[4]="A"
head(Price,6)
##   Year Period    Label     A
## 1 2002    M01 2002 Jan 173.2
## 2 2002    M02 2002 Feb 173.7
## 3 2002    M03 2002 Mar 174.7
## 4 2002    M04 2002 Apr 175.8
## 5 2002    M05 2002 May 175.8
## 6 2002    M06 2002 Jun 175.9
# 7
Price=cbind(T=c(1:240),Price)
plot(Price$T,Price$A,type="l")
lines(Price$T,Price$T*0.355+176.8)

# 8
dat=Price$A#ts data
AM=c()
for(i in 7:length(dat)){
  AM[i-6]=mean(dat[i-6:i])
}
head(AM) #The prediction begins with the 7 phase
## [1] 173.2000 173.4500 173.8667 174.3500 174.6400 174.8500
# 9
xl1=dat[-239:-240]#Yn-2
xl2=dat[c(-1,-240)]#Yn-1
xl=dat[-1:-2]#Y The prediction begins with the third phase
myfun<-function(myvalues){x1<-myvalues[1];x2<-myvalues[2];x3<-myvalues[3]; 
y=x1*xl1+x2*xl2+x3
e=abs(xl-y);sum(e)}
AR<-optim(c(1,1,1), myfun)
AR$par#Model coefficient
## [1] -0.5628388  1.5582601  1.2166127
# 10
# AR
mean(abs(xl1*(-0.5628388)+xl2*1.5582601+1.5582601-xl))
## [1] 0.7074681
# AM
mean(abs(AM-dat[-1:-6]))
## [1] 23.59867
# AR is better

6 免费书籍学习

6.1 所有

bookdown

6.2 链接1

ss

6.3 链接2


ss

7 赞助

wx
zfb